# This file contains the R code that was used to implement the estimators and functionals in #"Transformations In Hazard Rate Estimation For Heavy-Tailed Data", IFAC proceesings, Springer, (2013) # As a fully working example the code herein can estimate the hazard rate from the Danish Fire Loss (DFL) data . # More general use of the estimates is also feasible by supplying the corresponding data set. #Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function. "Epanechnikov"<- function(x){ #Epanechikov kernel, square root of 5 is taken as 2.236068 (to save comp. time). ifelse(abs(x)< 2.236068, (3/4)*(1-((x^2)/5 ))/2.236068, ifelse(abs(x)>2.236068, 0,0)) } "IntEpanechnikov"<-function(x) { ifelse(x< -2.236068, 0, ifelse(x> 2.236068, 1, .5- (2.236068*x*(x^2-15))/100)) } "HigherOrder"<-function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1) * (3-x^2)/2))}#, ifelse(abs(x) > 4, 0, 0)) } "Thre1"<- #Watson - Leadbetter hazard rate (see Phd) function(xin, xout, kfun, bandfactor) { n<- length(xin) n1<-1:n xin.use<-sort(xin) h<-bandfactor*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile. arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2)* 1/h arg3 } "iThre1"<- #Watson - Leadbetter integrated hazard rate (cum. haz. est.) function(xin, xout, kfun, bandfactor) { n<- length(xin) n1<-1:n xin.use<-sort(xin) h<-bandfactor*diff(quantile(xin.use, c(.25, .75))) arg1<-(sapply(xout, "-", xin.use))/h arg2<-kfun(arg1)* 1/(n-n1+1) arg3<-colSums(arg2) arg3 } "SimpsonInt"<- function(xin, h) { #xin = data, h=distance of the points at which function is evaluated n<-length(xin) nn<-1:n int0<-xin[1] int2n<-xin[n] inteven<-xin[seq(3,n-1,by=2)] intodd<-xin[seq(2,n-2,by=2)] sumodd<-sum(intodd) sumeven<-sum(inteven) res<-(h/3)*(int0+4*sumodd+2*sumeven+int2n) res } "PilotWidth1"<- function(sample) { #returns Silverman's default width 1.06 A n^(-1/5) n<-length(sample) StD<-sd(sample) IqR<-diff(quantile(sample, c(.25, .75))) tmp<-min(StD, IqR/1.34) DefWidth<- 1.06*tmp*n^(-1/5) DefWidth } "LambdaHat"<- #Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964)) # -order statistics version. function(xin, xout, h, kfun){ n<- length(xin) n1<-1:n nn<-length(xout) xin.use<-sort(xin) #cat(xin.use) hazard.rate<-vector("numeric",nn) for(i in 1:nn){ hazard.rate[i]<-(1/h)*sum((kfun((xout[i]-xin.use)/h))/(n-n1+1)) } hazard.rate } "BandsIkde"<-function(xin) #returns default bandwidths for the variable bandwidth distribution function estimate: Swanepoel and Van Graan (2007) #d1 is the pilot bandwidth and d2 the adaptive { IqR<-diff(quantile(xin, c(.25, .75))) StD<-sd(xin) tmp<-min(StD, IqR/1.34) n<-length(xin) d2<- (375*sqrt(3) / (28*pi))^(1/7) *tmp^(4/7) * n^(1/7) d1 <-3.9 * tmp * n^(1/3) c(d1,d2) } "kde"<- function(xin, xout, h, kfun) { n<- length(xin) xin<-sort(xin) arg1<-(sapply(xout, "-", xin))/h #xin.use arg2<-kfun(arg1) arg3<-colSums(arg2) arg3/n } "iikde"<- function(xin, xout, kfun, h) { n<- length(xin) arg1<-(sapply(xout, "-", xin))/h arg2<-kfun(arg1) arg3<-colSums(arg2) arg3/n } "Ikde2"<- function(xin, xout, ikfun) { n<- length(xin) Bands<-BandsIkde(xin) PilotBand<-Bands[1]/1000 AdaptiveBand<-Bands[2]/500 xin<-sort(xin) kernel<-iikde(xin, xin, ikfun, PilotBand) kernelxout<- iikde(xin, xout, ikfun, PilotBand) arg1<-(sapply(kernelxout, "-", kernel))/AdaptiveBand #xin.use arg2<-ikfun(arg1) arg3<-colSums(arg2) arg3/n } "TransIkde"<- #Adaptive variable bandwidth distribution function estimator function(xin, xout, kfun=Epanechnikov) { n<- length(xin) xin<-sort(xin) nn<-length(xout) Bands<-BandsIkde(xin) PilotBand<-Bands[1] AdaptiveBand<-Bands[2] kernel<-kde(xin, xin, PilotBand, kfun) kernelxout<- kde(xin, xout, PilotBand, kfun) vdfe<-sapply(1:nn, function(i, kernel, kernelxout, AdaptiveBand, kfun) { sum( kfun( (kernelxout[i]-kernel)/AdaptiveBand )) }, kernel, kernelxout, AdaptiveBand, kfun) vdfe/n } "ikde"<- function(xin, xout, kfun, bandfactor) { n<- length(xin) h<-bandfactor*diff(quantile(xin, c(.25, .75))) arg1<-(sapply(xout, "-", xin))/h arg2<-kfun(arg1) arg3<-colSums(arg2) arg3/n } "Thre2.1"<- #Transformed hazard rate estimator (2nd stage) using nonparametric transformation -log(1-\hat F(x)) function(xin, xout, h2, kfun, ikfun, bandfactor) { n<-length(xin) n1<-1:n nn<-length(xout) xin.use<-sort(xin) GofX<- -logb(1- Ikde2(xin.use, xout, ikfun), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xout, igaussian) #pchisq(xout, 12), base=exp(1)) # Tsample<- -logb(1- Ikde2(xin.use, xin.use, ikfun), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xin.use, igaussian) #pchisq(xin.use, 12), base=exp(1))# GofXdashed<- Thre1(xin.use, xout, kfun, bandfactor) #ChiHaz(xout, 10)# arg1<-(sapply(GofX, "-", Tsample))/h2 arg2<-kfun(arg1)/(n-n1+1) arg3<-colSums(arg2) TransEstimate<- arg3*GofXdashed/h2 TransEstimate } "VarHre"<- #Adaptive variable bandwidth hazard rate estimator function(xin, xout, h2, bandfactor, kfun=Epanechnikov) { n<- length(xin) nn<-length(xout) n1<-1:n xin.use<-sort(xin) kernel<-Thre1(xin.use, xin.use, kfun, bandfactor) #Pilot estimate (W-L) SqrtKernel<-sqrt(kernel) vhre<-sapply(1:nn, function(i, xin.use, xout, h2, kfun, n, n1, SqrtKernel) { sum( (SqrtKernel * kfun( ((xout[i]-xin.use)/h2) * SqrtKernel) )/(n-n1+1)) }, xin.use, xout, h2, kfun, n, n1, SqrtKernel) vhre/h2 } mydata<-unlist(read.csv("DFL.csv")) #mydata<-mydata[,2] #mydata<-sample(mydata, 10000) xout<-seq(min(mydata), max(mydata), length=200) xout<-quantile(xout, c(0.05, 0.95)) xout<-seq(min(xout), max(xout), length=200) #x<-seq(0, 5,length=100) #true.hazard<-hfrechet(x, 1,1,0) #Mat1<-matrix(nrow=100, ncol=20) #Mat2<-matrix(nrow=100, ncol=20) #for(i in 1:20) #{ #x1<- rfrechet(200, 1, 1,0) #arg1<-Thre2(x1, x, Epanechnikov, IntEpanechnikov, .2) #Mat1[,i]<-arg1 huse<-PilotWidth1(mydata) arg2<-Thre2.1(mydata,xout,huse, Epanechnikov, IntEpanechnikov, 0.2)# VarHre(mydata,xout,huse,0.2,Epanechnikov)# LambdaHat(mydata, xout, huse, Epanechnikov) # Thre2.1(mydata, xout, Epanechnikov, IntEpanechnikov, .2) #Mat2[,i]<-arg2 #} #arg1use<-rowMeans(arg1) #arg2use<-rowMeans(arg2) plot(xout, arg2, type="l", xlab="x", ylab="hazard rate") #lines(x, arg1use, col=2) #lines(x, arg2use, lty=3, col=3) #plot(x, pfrechet(x,1,1,0), type="l") #lines(x, Ikde2(x1, x, IntEpanechnikov), col=2) #Ikde2(x1, x, IntEpanechnikov) #lines(x, iikde(x1, x, IntEpanechnikov, BandsIkde(x1)[1]/1020))